#!/bin/sh
#############################################################################
#  Copyright (C) 2013-2015 Lawrence Livermore National Security, LLC.
#  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
#  Written by Albert Chu <chu11@llnl.gov>
#  LLNL-CODE-644248
#
#  This file is part of Magpie, scripts for running Hadoop on
#  traditional HPC systems.  For details, see https://github.com/llnl/magpie.
#
#  Magpie is free software; you can redistribute it and/or modify it
#  under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 2 of the License, or
#  (at your option) any later version.
#
#  Magpie is distributed in the hope that it will be useful, but
#  WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
#  General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with Magpie.  If not, see <http://www.gnu.org/licenses/>.
#############################################################################

############################################################################
# Flux Customizations
############################################################################

# Node count.  Node count should include one node for the
# head/management/master node.  For example, if you want 8 compute
# nodes to process data, specify 9 nodes below.
#
# If including Zookeeper, include expected Zookeeper nodes.  For
# example, if you want 8 Hadoop compute nodes and 3 Zookeeper nodes,
# specify 12 nodes (1 master, 8 Hadoop, 3 Zookeeper)
#
# Also take into account additional nodes needed for other services.
#
# Many of the below can be configured on the command line.  If you are
# more comfortable specifying these on the command line, feel free to
# delete the customizations below.

# flux: --nodes=<my_node_count>
# flux: --output="flux-{{id}}.out"

# Note defaults of MAGPIE_STARTUP_TIME & MAGPIE_SHUTDOWN_TIME, this
# timelimit should be a fair amount larger than them combined.
# flux: --time-limit=<my_time_in_minutes>

# Job name.  This will be used in naming directories for the job.
# flux: --job-name=<my_job_name>

# Queue to launch job in
# flux: --queue=<my_queue>

## Flux Values
# Generally speaking, don't touch the following, misc other configuration

# flux: --exclusive

# flux: --broker-opts=-Sbroker.critical-ranks=0
# flux: --broker-opts=-Stbon.topo=kary:0
# flux: -o exit-timeout=none

# Need to tell Magpie how you are submitting this job
export MAGPIE_SUBMISSION_TYPE="fluxbatchrun"


############################################################################
# Magpie Configurations
############################################################################

# Directory your launching scripts/files are stored
#
# Normally an NFS mount, someplace magpie can be reached on all nodes.
export MAGPIE_SCRIPTS_HOME="${HOME}/magpie"

# Path to store data local to each cluster node, typically something
# in /tmp.  This will store local conf files and log files for your
# job.  If local scratch space is not available, consider using the
# MAGPIE_NO_LOCAL_DIR option.  See README for more details.
#
export MAGPIE_LOCAL_DIR="/tmp/${USER}/magpie"

# Magpie job type
#
# "storm" - Run a job according to the settings of STORM_JOB.
#
# "zookeeper" - Run a job according to the settings of ZOOKEEPER_JOB.
#
# "testall" - Run a job that runs all basic sanity tests for all
#             software that is configured to be setup.  This is a good
#             way to sanity check that everything has been setup
#             correctly and the way you like.
#
#             For Storm, testall will run stormwordcount
#             For Zookeeper, testall will run zookeeperruok
#
# "script" - Run arbitraty script, as specified by MAGPIE_JOB_SCRIPT.
#            You can find example job scripts in examples/.
#
# "interactive" - manually interact with job run to submit jobs,
#                 peruse data (e.g. HDFS), move data, etc.  See job
#                 output for instructions to access your job
#                 allocation.
#
# "setuponly" - do not launch any daemons or services, only setup
#               configuration files.  Useful for debugging or
#               development.
#
export MAGPIE_JOB_TYPE="storm"

# Specify script and arguments to execute for "script" mode in
# MAGPIE_JOB_TYPE
#
# export MAGPIE_JOB_SCRIPT="${HOME}/my-job-script"

# Specify script startup / shutdown time window
#
# Specifies the amount of time to give startup / shutdown activities a
# chance to succeed before Magpie will give up (or in the case of
# shutdown, when the resource manager/scheduler may kill the running
# job).  Defaults to 30 minutes for startup, 30 minutes for shutdown.
#
# The startup time in particular may need to be increased if you have
# a large amount of data.  As an example, HDFS may need to spend a
# significant amount of time determine all of the blocks in HDFS
# before leaving safemode.
#
# The stop time in particular may need to be increased if you have a
# large amount of cleanup to be done.  HDFS will save its NameSpace
# before shutting down.  Hbase will do a compaction before shutting
# down.
#
# The startup & shutdown window must together be smaller than the
# timelimit specified for the job.
#
# MAGPIE_STARTUP_TIME and MAGPIE_SHUTDOWN_TIME at minimum must be 5
# minutes.  If MAGPIE_POST_JOB_RUN is specified below,
# MAGPIE_SHUTDOWN_TIME must be at minimum 10 minutes.
#
# export MAGPIE_STARTUP_TIME=30
# export MAGPIE_SHUTDOWN_TIME=30

# Magpie One Time Run
#
# Normally, Magpie assumes that when a user runs a job, data created
# and stored within that job will be desired to be accessed again.  For
# example, data created and stored within HDFS will be accessed again.
#
# Under a number of scenarios, this may not be desired.  For example
# during testing.
#
# To improve useability and performance, setting MAGPIE_ONE_TIME_RUN
# below to yes will have two effects on the Magpie job.
#
# 1) A number of data paths (such as for HDFS) will be put into unique
#    paths for this job.  Therefore, no other job should be able to
#    access the data again.  This is particularly useful if you wish
#    to run performance tests with this job script over and over
#    again.
#
#    Magpie will not remove data that was written, so be sure to clean up
#    your directories later.
#
# 2) In order to improve job throughout, Magpie will take shortcuts by
#    not properly tearing down the job.  As data corruption should not be
#    a concern on job teardown, the job can complete more quickly.
#
# export MAGPIE_ONE_TIME_RUN=yes

# Convenience Scripts
#
# Specify script to be executed to before / after your job.  It is run
# on all nodes.
#
# Typically the pre-job script is used to set something up or get
# debugging info.  It can also be used to determine if system
# conditions meet the expectations of your job.  The primary job
# running script (magpie-run) will not be executed if the
# MAGPIE_PRE_JOB_RUN exits with a non-zero exit code.
#
# The post-job script is typically used for cleaning up something or
# gathering info (such as logs) for post-debugging/analysis.  If it is
# set, MAGPIE_SHUTDOWN_TIME above must be > 5.
#
# See example magpie-example-pre-job-script and
# magpie-example-post-job-script for ideas of what you can do w/ these
# scripts
#
# Multiple scripts can be specified separated by comma.  Arguments can
# be passed to scripts as well.
#
# A number of convenient scripts are available in the
# ${MAGPIE_SCRIPTS_HOME}/scripts directory.
#
# export MAGPIE_PRE_JOB_RUN="${MAGPIE_SCRIPTS_HOME}/scripts/pre-job-run-scripts/my-pre-job-script"
# export MAGPIE_POST_JOB_RUN="${MAGPIE_SCRIPTS_HOME}/scripts/post-job-run-scripts/my-post-job-script"
#
# Similar to the MAGPIE_PRE_JOB_RUN and MAGPIE_POST_JOB_RUN, scripts can be
# run after the stack is setup but prior to the script or interactive mode
# begins. This enables frontends and other processes that depend on the stack
# to be started up and torn down. In similar fashion the cleanup will be done
# immediately after the script or interactive mode exits before the stack is
# shutdown.
#
# export MAGPIE_PRE_EXECUTE_RUN="${MAGPIE_SCRIPTS_HOME}/scripts/pre-job-run-scripts/my-pre-job-script"
# export MAGPIE_POST_EXECUTE_RUN="${MAGPIE_SCRIPTS_HOME}/scripts/post-job-run-scripts/my-post-job-script"

# Environment Variable Script
#
# When working with Magpie interactively by logging into the master
# node of your job allocation, many environment variables may need to
# be set.  For example, environment variables for config file
# directories (e.g. HADOOP_CONF_DIR, HBASE_CONF_DIR, etc.) and home
# directories (e.g. HADOOP_HOME, HBASE_HOME, etc.) and more general
# environment variables (e.g. JAVA_HOME) may need to be set before you
# begin interacting with your big data setup.
#
# The standard job output from Magpie provides instructions on all the
# environment variables typically needed to interact with your job.
# However, this can be tedious if done by hand.
#
# If the environment variable specified below is set, Magpie will
# create the file and put into it every environment variable that
# would be useful when running your job interactively.  That way, it
# can be sourced easily if you will be running your job interactively.
# It can also be loaded or used by other job scripts.
#
# export MAGPIE_ENVIRONMENT_VARIABLE_SCRIPT="${HOME}/my-job-env"

# Environment Variable Shell Type
#
# Magpie outputs environment variables in help output and
# MAGPIE_ENVIRONMENT_VARIABLE_SCRIPT based on your SHELL environment
# variable.
#
# If you would like to output in a different shell type (perhaps you
# have programmed scripts in a different shell), specify that shell
# here.
#
# export MAGPIE_ENVIRONMENT_VARIABLE_SCRIPT_SHELL="/bin/bash"

# Hostname config
#
# Magpie internally assumes that the nodenames provided by the
# scheduler/resource manager are the addresses that should be used for
# configuration AND they are identical to the output of the `hostname`
# command, which is used by Magpie to determine what nodes individual
# services should run on.
#
# If this is not true in your environment, you can provide an alternate
# hostname command below to correct this.  Very often, users may need to
# set:
#
# MAGPIE_HOSTNAME_CMD="hostname -s" // use short hostname
# or
# MAGPIE_HOSTNAME_CMD="hostname -f" // use FQDN
#
# If you have a more complex situation, see README.hostname for more
# advanced options.
#
# export MAGPIE_HOSTNAME_CMD="hostname"

# Remote Shell
#
# Magpie requires a passwordless remote shell command to launch
# necessary daemons across your job allocation.  Magpie defaults to
# ssh, but it may be an alternate command in some environments.  An
# alternate ssh-equivalent remote command can be specified by setting
# MAGPIE_REMOTE_CMD below.
#
# If using ssh, Magpie requires keys to be setup ahead of time so it
# can be executed without passwords.
#
# Specify options to the remote shell command if necessary.
#
# export MAGPIE_REMOTE_CMD="ssh"
# export MAGPIE_REMOTE_CMD_OPTS=""

############################################################################
# General Configuration
############################################################################

# Necessary for most projects
export JAVA_HOME="/usr/lib/jvm/jre-1.7.0/"

############################################################################
# Storm Core Configurations
############################################################################

# Should Storm be run
#
# Specify yes or no.  Defaults to no.
#
export STORM_SETUP=yes

# Version
#
export STORM_VERSION="1.2.3"

# Path to your Storm build/binaries
#
# This should be accessible on all nodes in your allocation. Typically
# this is in an NFS mount.
#
export STORM_HOME="${HOME}/apache-storm-${STORM_VERSION}"

# Path to store data local to each cluster node, typically something
# in /tmp.  This will store local conf files and log files for your
# job.  If local scratch space is not available, consider using the
# MAGPIE_NO_LOCAL_DIR option.  See README for more details.
#
export STORM_LOCAL_DIR="/tmp/${USER}/storm"

# Directory where alternate Storm configuration templates are stored
#
# If you wish to tweak the configuration files used by Magpie, set
# STORM_CONF_FILES below, copy configuration templates from
# $MAGPIE_SCRIPTS_HOME/conf/storm into STORM_CONF_FILES, and modify as
# you desire.  Magpie will still use configuration files in
# $MAGPIE_SCRIPTS_HOME/conf/storm if any of the files it needs are not
# found in STORM_CONF_FILES.
#
# export STORM_CONF_FILES="${HOME}/myconf"

# Storm Supervisor Slots
#
# Specify the number of slots/workers per supervisor/worker shall run.
#
# If not specified, defaults to half the number of processors on the system.
#
# export STORM_SUPERVISOR_SLOTS="16"

# Daemon Heap Max
#
# Heap maximum for Storm daemons, specified in megs.
#
# If not specified, defaults to 1024
#
# May need to be increased if you are scaling large, get OutofMemory
# errors, or perhaps have a lot of cores on a node.
#
# export STORM_DAEMON_HEAP_MAX=1024

# Worker Heap Max
#
# Heap maximum for Storm workers, specified in megs.
#
# If not specified, defaults to 1024
#
# May need to be increased if you are scaling large, get OutofMemory
# errors, or perhaps have a lot of cores on a node.
#
# export STORM_WORKER_HEAP_MAX=1024

############################################################################
# Storm Job/Run Configurations
############################################################################

# Set storm job for MAGPIE_JOB_TYPE = storm
#
# "stormwordcount" - run Storm word count example.  Useful for making
#            sure things are setup the way you like.
#
#            Note that there is no "output" from stormwordcount, it is
#            stored in Storm log files.  Consider using the
#            magpie-gather-config-files-and-logs-script.sh to gather
#            those log files.
#
export STORM_JOB="stormwordcount"
############################################################################
# Zookeeper Configurations
############################################################################

# Should Zookeeper be run
#
# Specify yes or no.  Defaults to no.
#
export ZOOKEEPER_SETUP=yes

# Zookeeper Replication Count
#
# Recommended to be odd.
#
export ZOOKEEPER_REPLICATION_COUNT=3

# Zookeeper Node Sharing
#
# By default, Zookeeper will not run on nodes that will run Hadoop/Hbase.
# They will have dedicated nodes for themselves.  If you do not wish
# for this to be the case, set the below to 'yes'.  Defaults to no.
#
# Keep in mind that adjustments to the number of nodes in your
# allocation may need to be adjusted given your setting of this
# parameter.  For example, if you want 8 nodes for Hadoop processing,
# you should increase your allocation by ZOOKEEPER_REPLICATION_COUNT
# if the below is 'no'.
#
# export ZOOKEEPER_SHARE_NODES=yes

# Set zookeeper job for MAGPIE_JOB_TYPE = zookeeper
#
# "zookeeperruok" - Run a quick sanity test to see that zookeeper is
#             setup correctly.  zookeeperruok will do a simple 'ruok'
#             to all Zookeeper daemons.
#
export ZOOKEEPER_JOB="zookeeperruok"

# Zookeeper Version
#
export ZOOKEEPER_VERSION="3.4.14"

# Path to your Zookeeper build/binaries
#
# This should be accessible on all nodes in your allocation. Typically
# this is in an NFS mount.
#
export ZOOKEEPER_HOME="${HOME}/zookeeper-${ZOOKEEPER_VERSION}"

# Directory where alternate Zookeeper configuration templates are stored
#
# If you wish to tweak the configuration files used by Magpie, set
# ZOOKEEPER_CONF_FILES below, copy configuration templates from
# $MAGPIE_SCRIPTS_HOME/conf/zookeeper into ZOOKEEPER_CONF_FILES, and
# modify as you desire.  Magpie will still use configuration files in
# $MAGPIE_SCRIPTS_HOME/conf/zookeeper if any of the files it needs are not
# found in ZOOKEEPER_CONF_FILES.
#
# export ZOOKEEPER_CONF_FILES="${HOME}/myconf"

# Path base for zookeeper data to be stored on each cluster node
#
# ZOOKEEPER_DATA_DIR can point to either a network file system path or
# a local drive path.
#
# If a local drive or SSD/NVRAM is available, a local path is
# preferable.  If set to local, please see ZOOKEEPER_DATA_DIR_TYPE
# below for optimization possibilties.
#
export ZOOKEEPER_DATA_DIR="/lustre/${USER}/zookeeper"

# Zookeeper cleanup
#
# After your job has completed, if ZOOKEEPER_DATA_DIR_CLEAR is set to
# yes, Magpie will do a rm -rf on ZOOKEEPER_DATA_DIR.  This may be
# convenient for cleaning up your job after it has run.  This is
# particularly useful if ZOOKEEPER_DATA_DIR is on a local ssd /drive.
# B/c on your next job run, you may not be able to get the nodes you
# want on your next run, leading to problems.
#
# export ZOOKEEPER_DATA_DIR_CLEAR="yes"

# Zookeeper data dir type
#
# Inform Magpie what type of directory ZOOKEEPER_DATA_DIR points to.
#
# This configuration isn't entirely necessary to be set, but if set to
# networkfs, Magpie will increase a number of default timeouts in
# Zookeeper as well as other projects to adjust for the fact Zookeeper
# is running on a network file system.
#
# "networkfs" - ZOOKEEPER_DATA_DIR points to a network filesystem
#               (such as Lustre).
#
# "local" - ZOOKEEPER_DATA_DIR points to a local drive.
#
export ZOOKEEPER_DATA_DIR_TYPE="networkfs"

# Path to store data local to each cluster node, typically something
# in /tmp.  This will store local conf files and log files for your
# job.  If local scratch space is not available, consider using the
# MAGPIE_NO_LOCAL_DIR option.  See README for more details.
#
export ZOOKEEPER_LOCAL_DIR="/tmp/${USER}/zookeeper"

# ZooKeeper ticktime, measured in milliseconds.  Used by all of
# Zookeeper for time measurement.
#
# Defaults to 2000.
#
# export ZOOKEEPER_TICKTIME=2000

# ZooKeeper initLimit, multiple of ticks to allow followers to connect
# and sync to a leader.  May need to increase this value if the data
# managed by ZooKeeper is large.
#
# Defaults to 10 if ZOOKEEPER_DATA_DIR_TYPE is local, 20 if networkfs
#
# export ZOOKEEPER_INITLIMIT=10

# ZooKeeper syncLimit, multiple of ticks to allow followers to sync
# with ZooKeeper.  If they fall too far behind a leader, they will be
# dropped.
#
# Defaults to 5 if ZOOKEEPER_DATA_DIR_TYPE is local, 10 if networkfs
#
# export ZOOKEEPER_SYNCLIMIT=5

############################################################################
# Run Job
############################################################################

nnodes=`flux resource list -no "{nnodes}"`

flux run -N${nnodes} --tasks-per-node=1 -o exit-timeout=none $MAGPIE_SCRIPTS_HOME/magpie-check-inputs
if [ $? -ne 0 ]
then
    exit 1
fi
flux run -N${nnodes} --tasks-per-node=1 -o exit-timeout=none $MAGPIE_SCRIPTS_HOME/magpie-setup-core
if [ $? -ne 0 ]
then
    exit 1
fi
flux run -N${nnodes} --tasks-per-node=1 -o exit-timeout=none $MAGPIE_SCRIPTS_HOME/magpie-setup-projects
if [ $? -ne 0 ]
then
    exit 1
fi
flux run -N${nnodes} --tasks-per-node=1 -o exit-timeout=none $MAGPIE_SCRIPTS_HOME/magpie-setup-post
if [ $? -ne 0 ]
then
    exit 1
fi
flux run -N${nnodes} --tasks-per-node=1 -o exit-timeout=none $MAGPIE_SCRIPTS_HOME/magpie-pre-run
if [ $? -ne 0 ]
then
    exit 1
fi
flux run -N${nnodes} --tasks-per-node=1 -o exit-timeout=none $MAGPIE_SCRIPTS_HOME/magpie-run
flux run -N${nnodes} --tasks-per-node=1 -o exit-timeout=none $MAGPIE_SCRIPTS_HOME/magpie-cleanup
flux run -N${nnodes} --tasks-per-node=1 -o exit-timeout=none $MAGPIE_SCRIPTS_HOME/magpie-post-run
